//----------------------------------------------------------------------------
//
// TSDuck - The MPEG Transport Stream Toolkit
// Copyright (c) 2005-2022, Thierry Lelegard
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
//
//----------------------------------------------------------------------------

#include "tsUChar.h"
#include "tsUString.h"
#include "tsAlgorithm.h"
#include "tsSingletonManager.h"


//----------------------------------------------------------------------------
// The macro MAP_SINGLETON defines a singleton class which inherits
// from std::map<key_type, value_type>.
// The constructor needs to be separately defined.
//----------------------------------------------------------------------------

#define MAP_SINGLETON(classname, key_type, value_type)      \
    class classname : public std::map<key_type, value_type> \
    {                                                       \
        TS_DECLARE_SINGLETON(classname);                    \
        typedef std::map<key_type, value_type> SuperClass;  \
    };                                                      \
    TS_DEFINE_SINGLETON(classname)


//----------------------------------------------------------------------------
// Map uppercase => lowercase.
//----------------------------------------------------------------------------

namespace {
    using namespace ts;
    MAP_SINGLETON(UpperLower, UChar, UChar);
    UpperLower::UpperLower() : SuperClass({
        {LATIN_CAPITAL_LETTER_A_WITH_GRAVE,        LATIN_SMALL_LETTER_A_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_A_WITH_ACUTE,        LATIN_SMALL_LETTER_A_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_A_WITH_TILDE,        LATIN_SMALL_LETTER_A_WITH_TILDE},
        {LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,    LATIN_SMALL_LETTER_A_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,   LATIN_SMALL_LETTER_A_WITH_RING_ABOVE},
        {LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,      LATIN_SMALL_LETTER_C_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_E_WITH_GRAVE,        LATIN_SMALL_LETTER_E_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_E_WITH_ACUTE,        LATIN_SMALL_LETTER_E_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,    LATIN_SMALL_LETTER_E_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_I_WITH_GRAVE,        LATIN_SMALL_LETTER_I_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_I_WITH_ACUTE,        LATIN_SMALL_LETTER_I_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,    LATIN_SMALL_LETTER_I_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_N_WITH_TILDE,        LATIN_SMALL_LETTER_N_WITH_TILDE},
        {LATIN_CAPITAL_LETTER_O_WITH_GRAVE,        LATIN_SMALL_LETTER_O_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_O_WITH_ACUTE,        LATIN_SMALL_LETTER_O_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_O_WITH_TILDE,        LATIN_SMALL_LETTER_O_WITH_TILDE},
        {LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,    LATIN_SMALL_LETTER_O_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_O_WITH_STROKE,       LATIN_SMALL_LETTER_O_WITH_STROKE},
        {LATIN_CAPITAL_LETTER_U_WITH_GRAVE,        LATIN_SMALL_LETTER_U_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_U_WITH_ACUTE,        LATIN_SMALL_LETTER_U_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,    LATIN_SMALL_LETTER_U_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,        LATIN_SMALL_LETTER_Y_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_A_WITH_MACRON,       LATIN_SMALL_LETTER_A_WITH_MACRON},
        {LATIN_CAPITAL_LETTER_A_WITH_BREVE,        LATIN_SMALL_LETTER_A_WITH_BREVE},
        {LATIN_CAPITAL_LETTER_A_WITH_OGONEK,       LATIN_SMALL_LETTER_A_WITH_OGONEK},
        {LATIN_CAPITAL_LETTER_C_WITH_ACUTE,        LATIN_SMALL_LETTER_C_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_C_WITH_CARON,        LATIN_SMALL_LETTER_C_WITH_CARON},
        {LATIN_CAPITAL_LETTER_D_WITH_CARON,        LATIN_SMALL_LETTER_D_WITH_CARON},
        {LATIN_CAPITAL_LETTER_D_WITH_STROKE,       LATIN_SMALL_LETTER_D_WITH_STROKE},
        {LATIN_CAPITAL_LETTER_E_WITH_MACRON,       LATIN_SMALL_LETTER_E_WITH_MACRON},
        {LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_E_WITH_OGONEK,       LATIN_SMALL_LETTER_E_WITH_OGONEK},
        {LATIN_CAPITAL_LETTER_E_WITH_CARON,        LATIN_SMALL_LETTER_E_WITH_CARON},
        {LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_G_WITH_BREVE,        LATIN_SMALL_LETTER_G_WITH_BREVE},
        {LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,      LATIN_SMALL_LETTER_G_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_H_WITH_STROKE,       LATIN_SMALL_LETTER_H_WITH_STROKE},
        {LATIN_CAPITAL_LETTER_I_WITH_TILDE,        LATIN_SMALL_LETTER_I_WITH_TILDE},
        {LATIN_CAPITAL_LETTER_I_WITH_MACRON,       LATIN_SMALL_LETTER_I_WITH_MACRON},
        {LATIN_CAPITAL_LETTER_I_WITH_OGONEK,       LATIN_SMALL_LETTER_I_WITH_OGONEK},
        {LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,      LATIN_SMALL_LETTER_K_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_L_WITH_ACUTE,        LATIN_SMALL_LETTER_L_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,      LATIN_SMALL_LETTER_L_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_L_WITH_CARON,        LATIN_SMALL_LETTER_L_WITH_CARON},
        {LATIN_CAPITAL_LETTER_L_WITH_STROKE,       LATIN_SMALL_LETTER_L_WITH_STROKE},
        {LATIN_CAPITAL_LETTER_N_WITH_ACUTE,        LATIN_SMALL_LETTER_N_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,      LATIN_SMALL_LETTER_N_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_N_WITH_CARON,        LATIN_SMALL_LETTER_N_WITH_CARON},
        {LATIN_CAPITAL_LETTER_O_WITH_MACRON,       LATIN_SMALL_LETTER_O_WITH_MACRON},
        {LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE, LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE},
        {LATIN_CAPITAL_LETTER_R_WITH_ACUTE,        LATIN_SMALL_LETTER_R_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,      LATIN_SMALL_LETTER_R_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_R_WITH_CARON,        LATIN_SMALL_LETTER_R_WITH_CARON},
        {LATIN_CAPITAL_LETTER_S_WITH_ACUTE,        LATIN_SMALL_LETTER_S_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,      LATIN_SMALL_LETTER_S_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_S_WITH_CARON,        LATIN_SMALL_LETTER_S_WITH_CARON},
        {LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,      LATIN_SMALL_LETTER_T_WITH_CEDILLA},
        {LATIN_CAPITAL_LETTER_T_WITH_CARON,        LATIN_SMALL_LETTER_T_WITH_CARON},
        {LATIN_CAPITAL_LETTER_T_WITH_STROKE,       LATIN_SMALL_LETTER_T_WITH_STROKE},
        {LATIN_CAPITAL_LETTER_U_WITH_TILDE,        LATIN_SMALL_LETTER_U_WITH_TILDE},
        {LATIN_CAPITAL_LETTER_U_WITH_MACRON,       LATIN_SMALL_LETTER_U_WITH_MACRON},
        {LATIN_CAPITAL_LETTER_U_WITH_BREVE,        LATIN_SMALL_LETTER_U_WITH_BREVE},
        {LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,   LATIN_SMALL_LETTER_U_WITH_RING_ABOVE},
        {LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE},
        {LATIN_CAPITAL_LETTER_U_WITH_OGONEK,       LATIN_SMALL_LETTER_U_WITH_OGONEK},
        {LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,   LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX},
        {LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,        LATIN_SMALL_LETTER_Z_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_Z_WITH_CARON,        LATIN_SMALL_LETTER_Z_WITH_CARON},
        {LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,  LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW},
        {LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,  LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW},
        {LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,    LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE},
        {LATIN_CAPITAL_LETTER_W_WITH_GRAVE,        LATIN_SMALL_LETTER_W_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_W_WITH_ACUTE,        LATIN_SMALL_LETTER_W_WITH_ACUTE},
        {LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,    LATIN_SMALL_LETTER_W_WITH_DIAERESIS},
        {LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,        LATIN_SMALL_LETTER_Y_WITH_GRAVE},
        {LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,    LATIN_SMALL_LETTER_Y_WITH_DIAERESIS},
        {GREEK_CAPITAL_LETTER_ALPHA_WITH_TONOS,    GREEK_SMALL_LETTER_ALPHA_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_EPSILON_WITH_TONOS,  GREEK_SMALL_LETTER_EPSILON_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_ETA_WITH_TONOS,      GREEK_SMALL_LETTER_ETA_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_IOTA_WITH_TONOS,     GREEK_SMALL_LETTER_IOTA_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_ALPHA,               GREEK_SMALL_LETTER_ALPHA},
        {GREEK_CAPITAL_LETTER_BETA,                GREEK_SMALL_LETTER_BETA},
        {GREEK_CAPITAL_LETTER_GAMMA,               GREEK_SMALL_LETTER_GAMMA},
        {GREEK_CAPITAL_LETTER_DELTA,               GREEK_SMALL_LETTER_DELTA},
        {GREEK_CAPITAL_LETTER_EPSILON,             GREEK_SMALL_LETTER_EPSILON},
        {GREEK_CAPITAL_LETTER_ZETA,                GREEK_SMALL_LETTER_ZETA},
        {GREEK_CAPITAL_LETTER_ETA,                 GREEK_SMALL_LETTER_ETA},
        {GREEK_CAPITAL_LETTER_THETA,               GREEK_SMALL_LETTER_THETA},
        {GREEK_CAPITAL_LETTER_IOTA,                GREEK_SMALL_LETTER_IOTA},
        {GREEK_CAPITAL_LETTER_KAPPA,               GREEK_SMALL_LETTER_KAPPA},
        {GREEK_CAPITAL_LETTER_LAMDA,               GREEK_SMALL_LETTER_LAMDA},
        {GREEK_CAPITAL_LETTER_MU,                  GREEK_SMALL_LETTER_MU},
        {GREEK_CAPITAL_LETTER_NU,                  GREEK_SMALL_LETTER_NU},
        {GREEK_CAPITAL_LETTER_XI,                  GREEK_SMALL_LETTER_XI},
        {GREEK_CAPITAL_LETTER_OMICRON,             GREEK_SMALL_LETTER_OMICRON},
        {GREEK_CAPITAL_LETTER_PI,                  GREEK_SMALL_LETTER_PI},
        {GREEK_CAPITAL_LETTER_RHO,                 GREEK_SMALL_LETTER_RHO},
        {GREEK_CAPITAL_LETTER_SIGMA,               GREEK_SMALL_LETTER_SIGMA},
        {GREEK_CAPITAL_LETTER_TAU,                 GREEK_SMALL_LETTER_TAU},
        {GREEK_CAPITAL_LETTER_UPSILON,             GREEK_SMALL_LETTER_UPSILON},
        {GREEK_CAPITAL_LETTER_PHI,                 GREEK_SMALL_LETTER_PHI},
        {GREEK_CAPITAL_LETTER_CHI,                 GREEK_SMALL_LETTER_CHI},
        {GREEK_CAPITAL_LETTER_PSI,                 GREEK_SMALL_LETTER_PSI},
        {GREEK_CAPITAL_LETTER_OMEGA,               GREEK_SMALL_LETTER_OMEGA},
        {GREEK_CAPITAL_LETTER_OMICRON_WITH_TONOS,  GREEK_SMALL_LETTER_OMICRON_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_UPSILON_WITH_TONOS,  GREEK_SMALL_LETTER_UPSILON_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_OMEGA_WITH_TONOS,    GREEK_SMALL_LETTER_OMEGA_WITH_TONOS},
        {GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA, GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA},
        {GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA, GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA},
        {CYRILLIC_CAPITAL_LETTER_A,                CYRILLIC_SMALL_LETTER_A},
        {CYRILLIC_CAPITAL_LETTER_BE,               CYRILLIC_SMALL_LETTER_BE},
        {CYRILLIC_CAPITAL_LETTER_VE,               CYRILLIC_SMALL_LETTER_VE},
        {CYRILLIC_CAPITAL_LETTER_GHE,              CYRILLIC_SMALL_LETTER_GHE},
        {CYRILLIC_CAPITAL_LETTER_DE,               CYRILLIC_SMALL_LETTER_DE},
        {CYRILLIC_CAPITAL_LETTER_IE,               CYRILLIC_SMALL_LETTER_IE},
        {CYRILLIC_CAPITAL_LETTER_ZHE,              CYRILLIC_SMALL_LETTER_ZHE},
        {CYRILLIC_CAPITAL_LETTER_ZE,               CYRILLIC_SMALL_LETTER_ZE},
        {CYRILLIC_CAPITAL_LETTER_I,                CYRILLIC_SMALL_LETTER_I},
        {CYRILLIC_CAPITAL_LETTER_SHORT_I,          CYRILLIC_SMALL_LETTER_SHORT_I},
        {CYRILLIC_CAPITAL_LETTER_KA,               CYRILLIC_SMALL_LETTER_KA},
        {CYRILLIC_CAPITAL_LETTER_EL,               CYRILLIC_SMALL_LETTER_EL},
        {CYRILLIC_CAPITAL_LETTER_EM,               CYRILLIC_SMALL_LETTER_EM},
        {CYRILLIC_CAPITAL_LETTER_EN,               CYRILLIC_SMALL_LETTER_EN},
        {CYRILLIC_CAPITAL_LETTER_O,                CYRILLIC_SMALL_LETTER_O},
        {CYRILLIC_CAPITAL_LETTER_PE,               CYRILLIC_SMALL_LETTER_PE},
        {CYRILLIC_CAPITAL_LETTER_ER,               CYRILLIC_SMALL_LETTER_ER},
        {CYRILLIC_CAPITAL_LETTER_ES,               CYRILLIC_SMALL_LETTER_ES},
        {CYRILLIC_CAPITAL_LETTER_TE,               CYRILLIC_SMALL_LETTER_TE},
        {CYRILLIC_CAPITAL_LETTER_U,                CYRILLIC_SMALL_LETTER_U},
        {CYRILLIC_CAPITAL_LETTER_EF,               CYRILLIC_SMALL_LETTER_EF},
        {CYRILLIC_CAPITAL_LETTER_HA,               CYRILLIC_SMALL_LETTER_HA},
        {CYRILLIC_CAPITAL_LETTER_TSE,              CYRILLIC_SMALL_LETTER_TSE},
        {CYRILLIC_CAPITAL_LETTER_CHE,              CYRILLIC_SMALL_LETTER_CHE},
        {CYRILLIC_CAPITAL_LETTER_SHA,              CYRILLIC_SMALL_LETTER_SHA},
        {CYRILLIC_CAPITAL_LETTER_SHCHA,            CYRILLIC_SMALL_LETTER_SHCHA},
        {CYRILLIC_CAPITAL_LETTER_HARD_SIGN,        CYRILLIC_SMALL_LETTER_HARD_SIGN},
        {CYRILLIC_CAPITAL_LETTER_YERU,             CYRILLIC_SMALL_LETTER_YERU},
        {CYRILLIC_CAPITAL_LETTER_SOFT_SIGN,        CYRILLIC_SMALL_LETTER_SOFT_SIGN},
        {CYRILLIC_CAPITAL_LETTER_E,                CYRILLIC_SMALL_LETTER_E},
        {CYRILLIC_CAPITAL_LETTER_YU,               CYRILLIC_SMALL_LETTER_YU},
        {CYRILLIC_CAPITAL_LETTER_YA,               CYRILLIC_SMALL_LETTER_YA},
        {CYRILLIC_CAPITAL_LETTER_IO,               CYRILLIC_SMALL_LETTER_IO},
        {CYRILLIC_CAPITAL_LETTER_DJE,              CYRILLIC_SMALL_LETTER_DJE},
        {CYRILLIC_CAPITAL_LETTER_GJE,              CYRILLIC_SMALL_LETTER_GJE},
        {CYRILLIC_CAPITAL_LETTER_UKRAINIAN_IE,     CYRILLIC_SMALL_LETTER_UKRAINIAN_IE},
        {CYRILLIC_CAPITAL_LETTER_DZE,              CYRILLIC_SMALL_LETTER_DZE},
        {CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I, CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I},
        {CYRILLIC_CAPITAL_LETTER_YI,               CYRILLIC_SMALL_LETTER_YI},
        {CYRILLIC_CAPITAL_LETTER_JE,               CYRILLIC_SMALL_LETTER_JE},
        {CYRILLIC_CAPITAL_LETTER_LJE,              CYRILLIC_SMALL_LETTER_LJE},
        {CYRILLIC_CAPITAL_LETTER_NJE,              CYRILLIC_SMALL_LETTER_NJE},
        {CYRILLIC_CAPITAL_LETTER_TSHE,             CYRILLIC_SMALL_LETTER_TSHE},
        {CYRILLIC_CAPITAL_LETTER_KJE,              CYRILLIC_SMALL_LETTER_KJE},
        {CYRILLIC_CAPITAL_LETTER_SHORT_U,          CYRILLIC_SMALL_LETTER_SHORT_U},
        {CYRILLIC_CAPITAL_LETTER_DZHE,             CYRILLIC_SMALL_LETTER_DZHE},
    }) {}
}


//----------------------------------------------------------------------------
// Map lowercase => uppercase.
//----------------------------------------------------------------------------

namespace {
    MAP_SINGLETON(LowerUpper, ts::UChar, ts::UChar);
    LowerUpper::LowerUpper() : SuperClass()
    {
        // Build inversed table from UpperLower.
        const UpperLower* ul = UpperLower::Instance();
        for (UpperLower::const_iterator it = ul->begin(); it != ul->end(); ++it) {
            insert(std::make_pair(it->second, it->first));
        }
    }
}


//----------------------------------------------------------------------------
// Map accented letter => without accent sequence.
//----------------------------------------------------------------------------

namespace {
    using namespace ts;
    MAP_SINGLETON(WithoutAccent, UChar, const char*);
    WithoutAccent::WithoutAccent() : SuperClass({
        {LATIN_CAPITAL_LETTER_A_WITH_GRAVE,        "A"},
        {LATIN_CAPITAL_LETTER_A_WITH_ACUTE,        "A"},
        {LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,   "A"},
        {LATIN_CAPITAL_LETTER_A_WITH_TILDE,        "A"},
        {LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,    "A"},
        {LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,   "A"},
        {LATIN_CAPITAL_LETTER_AE,                  "AE"},
        {LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,      "C"},
        {LATIN_CAPITAL_LETTER_E_WITH_GRAVE,        "E"},
        {LATIN_CAPITAL_LETTER_E_WITH_ACUTE,        "E"},
        {LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,   "E"},
        {LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,    "E"},
        {LATIN_CAPITAL_LETTER_I_WITH_GRAVE,        "I"},
        {LATIN_CAPITAL_LETTER_I_WITH_ACUTE,        "I"},
        {LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,   "I"},
        {LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,    "I"},
        {LATIN_CAPITAL_LETTER_ETH,                 "E"},
        {LATIN_CAPITAL_LETTER_N_WITH_TILDE,        "N"},
        {LATIN_CAPITAL_LETTER_O_WITH_GRAVE,        "O"},
        {LATIN_CAPITAL_LETTER_O_WITH_ACUTE,        "O"},
        {LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,   "O"},
        {LATIN_CAPITAL_LETTER_O_WITH_TILDE,        "O"},
        {LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,    "O"},
        {LATIN_CAPITAL_LETTER_O_WITH_STROKE,       "O"},
        {LATIN_CAPITAL_LETTER_U_WITH_GRAVE,        "U"},
        {LATIN_CAPITAL_LETTER_U_WITH_ACUTE,        "U"},
        {LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,   "U"},
        {LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,    "U"},
        {LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,        "Y"},
        {LATIN_CAPITAL_LETTER_THORN,               "T"},
        {LATIN_SMALL_LETTER_SHARP_S,               "ss"},
        {LATIN_SMALL_LETTER_A_WITH_GRAVE,          "a"},
        {LATIN_SMALL_LETTER_A_WITH_ACUTE,          "a"},
        {LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,     "a"},
        {LATIN_SMALL_LETTER_A_WITH_TILDE,          "a"},
        {LATIN_SMALL_LETTER_A_WITH_DIAERESIS,      "a"},
        {LATIN_SMALL_LETTER_A_WITH_RING_ABOVE,     "a"},
        {LATIN_SMALL_LETTER_AE,                    "ae"},
        {LATIN_SMALL_LETTER_C_WITH_CEDILLA,        "c"},
        {LATIN_SMALL_LETTER_E_WITH_GRAVE,          "e"},
        {LATIN_SMALL_LETTER_E_WITH_ACUTE,          "e"},
        {LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX,     "e"},
        {LATIN_SMALL_LETTER_E_WITH_DIAERESIS,      "e"},
        {LATIN_SMALL_LETTER_I_WITH_GRAVE,          "i"},
        {LATIN_SMALL_LETTER_I_WITH_ACUTE,          "i"},
        {LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX,     "i"},
        {LATIN_SMALL_LETTER_I_WITH_DIAERESIS,      "i"},
        {LATIN_SMALL_LETTER_ETH,                   "e"},
        {LATIN_SMALL_LETTER_N_WITH_TILDE,          "n"},
        {LATIN_SMALL_LETTER_O_WITH_GRAVE,          "o"},
        {LATIN_SMALL_LETTER_O_WITH_ACUTE,          "o"},
        {LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX,     "o"},
        {LATIN_SMALL_LETTER_O_WITH_TILDE,          "o"},
        {LATIN_SMALL_LETTER_O_WITH_DIAERESIS,      "o"},
        {LATIN_SMALL_LETTER_O_WITH_STROKE,         "o"},
        {LATIN_SMALL_LETTER_U_WITH_GRAVE,          "u"},
        {LATIN_SMALL_LETTER_U_WITH_ACUTE,          "u"},
        {LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX,     "u"},
        {LATIN_SMALL_LETTER_U_WITH_DIAERESIS,      "u"},
        {LATIN_SMALL_LETTER_Y_WITH_ACUTE,          "y"},
        {LATIN_SMALL_LETTER_Y_WITH_DIAERESIS,      "y"},
        {LATIN_CAPITAL_LETTER_A_WITH_MACRON,       "A"},
        {LATIN_SMALL_LETTER_A_WITH_MACRON,         "a"},
        {LATIN_CAPITAL_LETTER_A_WITH_BREVE,        "A"},
        {LATIN_SMALL_LETTER_A_WITH_BREVE,          "a"},
        {LATIN_CAPITAL_LETTER_A_WITH_OGONEK,       "A"},
        {LATIN_SMALL_LETTER_A_WITH_OGONEK,         "a"},
        {LATIN_CAPITAL_LETTER_C_WITH_ACUTE,        "C"},
        {LATIN_SMALL_LETTER_C_WITH_ACUTE,          "c"},
        {LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,   "C"},
        {LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX,     "c"},
        {LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,    "C"},
        {LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE,      "c"},
        {LATIN_CAPITAL_LETTER_C_WITH_CARON,        "C"},
        {LATIN_SMALL_LETTER_C_WITH_CARON,          "c"},
        {LATIN_CAPITAL_LETTER_D_WITH_CARON,        "D"},
        {LATIN_SMALL_LETTER_D_WITH_CARON,          "d"},
        {LATIN_CAPITAL_LETTER_D_WITH_STROKE,       "D"},
        {LATIN_SMALL_LETTER_D_WITH_STROKE,         "d"},
        {LATIN_CAPITAL_LETTER_E_WITH_MACRON,       "E"},
        {LATIN_SMALL_LETTER_E_WITH_MACRON,         "e"},
        {LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,    "E"},
        {LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE,      "e"},
        {LATIN_CAPITAL_LETTER_E_WITH_OGONEK,       "E"},
        {LATIN_SMALL_LETTER_E_WITH_OGONEK,         "e"},
        {LATIN_CAPITAL_LETTER_E_WITH_CARON,        "E"},
        {LATIN_SMALL_LETTER_E_WITH_CARON,          "e"},
        {LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,   "G"},
        {LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX,     "g"},
        {LATIN_CAPITAL_LETTER_G_WITH_BREVE,        "G"},
        {LATIN_SMALL_LETTER_G_WITH_BREVE,          "g"},
        {LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,    "G"},
        {LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE,      "g"},
        {LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,      "G"},
        {LATIN_SMALL_LETTER_G_WITH_CEDILLA,        "g"},
        {LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,   "H"},
        {LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX,     "h"},
        {LATIN_CAPITAL_LETTER_H_WITH_STROKE,       "H"},
        {LATIN_SMALL_LETTER_H_WITH_STROKE,         "h"},
        {LATIN_CAPITAL_LETTER_I_WITH_TILDE,        "I"},
        {LATIN_SMALL_LETTER_I_WITH_TILDE,          "i"},
        {LATIN_CAPITAL_LETTER_I_WITH_MACRON,       "I"},
        {LATIN_SMALL_LETTER_I_WITH_MACRON,         "i"},
        {LATIN_CAPITAL_LETTER_I_WITH_OGONEK,       "I"},
        {LATIN_SMALL_LETTER_I_WITH_OGONEK,         "i"},
        {LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,    "I"},
        {LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,   "J"},
        {LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX,     "j"},
        {LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,      "K"},
        {LATIN_SMALL_LETTER_K_WITH_CEDILLA,        "k"},
        {LATIN_CAPITAL_LETTER_L_WITH_ACUTE,        "L"},
        {LATIN_SMALL_LETTER_L_WITH_ACUTE,          "l"},
        {LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,      "L"},
        {LATIN_SMALL_LETTER_L_WITH_CEDILLA,        "l"},
        {LATIN_CAPITAL_LETTER_L_WITH_CARON,        "L"},
        {LATIN_SMALL_LETTER_L_WITH_CARON,          "l"},
        {LATIN_CAPITAL_LETTER_L_WITH_STROKE,       "L"},
        {LATIN_SMALL_LETTER_L_WITH_STROKE,         "l"},
        {LATIN_CAPITAL_LETTER_N_WITH_ACUTE,        "N"},
        {LATIN_SMALL_LETTER_N_WITH_ACUTE,          "n"},
        {LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,      "N"},
        {LATIN_SMALL_LETTER_N_WITH_CEDILLA,        "n"},
        {LATIN_CAPITAL_LETTER_N_WITH_CARON,        "N"},
        {LATIN_SMALL_LETTER_N_WITH_CARON,          "n"},
        {LATIN_CAPITAL_LETTER_O_WITH_MACRON,       "O"},
        {LATIN_SMALL_LETTER_O_WITH_MACRON,         "o"},
        {LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE, "O"},
        {LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE,   "o"},
        {LATIN_CAPITAL_LIGATURE_OE,                "OE"},
        {LATIN_SMALL_LIGATURE_OE,                  "oe"},
        {LATIN_CAPITAL_LETTER_R_WITH_ACUTE,        "R"},
        {LATIN_SMALL_LETTER_R_WITH_ACUTE,          "r"},
        {LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,      "R"},
        {LATIN_SMALL_LETTER_R_WITH_CEDILLA,        "r"},
        {LATIN_CAPITAL_LETTER_R_WITH_CARON,        "R"},
        {LATIN_SMALL_LETTER_R_WITH_CARON,          "r"},
        {LATIN_CAPITAL_LETTER_S_WITH_ACUTE,        "S"},
        {LATIN_SMALL_LETTER_S_WITH_ACUTE,          "s"},
        {LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,   "S"},
        {LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX,     "s"},
        {LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,      "S"},
        {LATIN_SMALL_LETTER_S_WITH_CEDILLA,        "s"},
        {LATIN_CAPITAL_LETTER_S_WITH_CARON,        "S"},
        {LATIN_SMALL_LETTER_S_WITH_CARON,          "s"},
        {LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,      "T"},
        {LATIN_SMALL_LETTER_T_WITH_CEDILLA,        "t"},
        {LATIN_CAPITAL_LETTER_T_WITH_CARON,        "T"},
        {LATIN_SMALL_LETTER_T_WITH_CARON,          "t"},
        {LATIN_CAPITAL_LETTER_T_WITH_STROKE,       "T"},
        {LATIN_SMALL_LETTER_T_WITH_STROKE,         "t"},
        {LATIN_CAPITAL_LETTER_U_WITH_TILDE,        "U"},
        {LATIN_SMALL_LETTER_U_WITH_TILDE,          "u"},
        {LATIN_CAPITAL_LETTER_U_WITH_MACRON,       "U"},
        {LATIN_SMALL_LETTER_U_WITH_MACRON,         "u"},
        {LATIN_CAPITAL_LETTER_U_WITH_BREVE,        "U"},
        {LATIN_SMALL_LETTER_U_WITH_BREVE,          "u"},
        {LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,   "U"},
        {LATIN_SMALL_LETTER_U_WITH_RING_ABOVE,     "u"},
        {LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, "U"},
        {LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE,   "u"},
        {LATIN_CAPITAL_LETTER_U_WITH_OGONEK,       "U"},
        {LATIN_SMALL_LETTER_U_WITH_OGONEK,         "u"},
        {LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,   "W"},
        {LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX,     "w"},
        {LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,   "Y"},
        {LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX,     "y"},
        {LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,    "Y"},
        {LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,        "Z"},
        {LATIN_SMALL_LETTER_Z_WITH_ACUTE,          "z"},
        {LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,    "Z"},
        {LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE,      "z"},
        {LATIN_CAPITAL_LETTER_Z_WITH_CARON,        "Z"},
        {LATIN_SMALL_LETTER_Z_WITH_CARON,          "z"},
        {LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,  "S"},
        {LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW,    "s"},
        {LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,  "T"},
        {LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW,    "t"},
        {LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,    "B"},
        {LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE,      "b"},
        {LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,    "D"},
        {LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE,      "d"},
        {LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,    "F"},
        {LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE,      "f"},
        {LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,    "M"},
        {LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE,      "m"},
        {LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,    "P"},
        {LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE,      "p"},
        {LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,    "S"},
        {LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE,      "s"},
        {LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,    "T"},
        {LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE,      "t"},
        {LATIN_CAPITAL_LETTER_W_WITH_GRAVE,        "W"},
        {LATIN_SMALL_LETTER_W_WITH_GRAVE,          "w"},
        {LATIN_CAPITAL_LETTER_W_WITH_ACUTE,        "W"},
        {LATIN_SMALL_LETTER_W_WITH_ACUTE,          "w"},
        {LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,    "W"},
        {LATIN_SMALL_LETTER_W_WITH_DIAERESIS,      "w"},
        {LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,        "Y"},
        {LATIN_SMALL_LETTER_Y_WITH_GRAVE,          "y"},
        {LATIN_SMALL_F_WITH_HOOK,                  "f"},
        {BLACKLETTER_CAPITAL_I,                    "I"},
        {SCRIPT_CAPITAL_P,                         "P"},
        {BLACKLETTER_CAPITAL_R,                    "R"},
    }) {}
}


//----------------------------------------------------------------------------
// Map character => html entity.
// See http://www.w3.org/TR/html4/sgml/entities.html
//----------------------------------------------------------------------------

namespace {
    using namespace ts;
    MAP_SINGLETON(HTMLEntities, UChar, const char*);
    HTMLEntities::HTMLEntities() : SuperClass({
        {QUOTATION_MARK, "quot"},
        {AMPERSAND, "amp"},
        {APOSTROPHE, "apos"},
        {LESS_THAN_SIGN, "lt"},
        {GREATER_THAN_SIGN, "gt"},
        {NO_BREAK_SPACE, "nbsp"},
        {INVERTED_EXCLAMATION_MARK, "iexcl"},
        {CENT_SIGN, "cent"},
        {POUND_SIGN, "pound"},
        {CURRENCY_SIGN, "curren"},
        {YEN_SIGN, "yen"},
        {BROKEN_BAR, "brvbar"},
        {SECTION_SIGN, "sect"},
        {DIAERESIS, "uml"},
        {COPYRIGHT_SIGN, "copy"},
        {FEMININE_ORDINAL_INDICATOR, "ordf"},
        {LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK, "laquo"},
        {NOT_SIGN, "not"},
        {SOFT_HYPHEN, "shy"},
        {REGISTERED_SIGN, "reg"},
        {MACRON, "macr"},
        {DEGREE_SIGN, "deg"},
        {PLUS_MINUS_SIGN, "plusmn"},
        {SUPERSCRIPT_TWO, "sup2"},
        {SUPERSCRIPT_THREE, "sup3"},
        {ACUTE_ACCENT, "acute"},
        {MICRO_SIGN, "micro"},
        {PILCROW_SIGN, "para"},
        {MIDDLE_DOT, "middot"},
        {CEDILLA, "cedil"},
        {SUPERSCRIPT_ONE, "sup1"},
        {MASCULINE_ORDINAL_INDICATOR, "ordm"},
        {RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK, "raquo"},
        {VULGAR_FRACTION_ONE_QUARTER, "frac14"},
        {VULGAR_FRACTION_ONE_HALF, "frac12"},
        {VULGAR_FRACTION_THREE_QUARTERS, "frac34"},
        {INVERTED_QUESTION_MARK, "iquest"},
        {LATIN_CAPITAL_LETTER_A_WITH_GRAVE, "Agrave"},
        {LATIN_CAPITAL_LETTER_A_WITH_ACUTE, "Aacute"},
        {LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, "Acirc"},
        {LATIN_CAPITAL_LETTER_A_WITH_TILDE, "Atilde"},
        {LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, "Auml"},
        {LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, "Aring"},
        {LATIN_CAPITAL_LETTER_AE, "AElig"},
        {LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, "Ccedil"},
        {LATIN_CAPITAL_LETTER_E_WITH_GRAVE, "Egrave"},
        {LATIN_CAPITAL_LETTER_E_WITH_ACUTE, "Eacute"},
        {LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, "Ecirc"},
        {LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, "Euml"},
        {LATIN_CAPITAL_LETTER_I_WITH_GRAVE, "Igrave"},
        {LATIN_CAPITAL_LETTER_I_WITH_ACUTE, "Iacute"},
        {LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, "Icirc"},
        {LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, "Iuml"},
        {LATIN_CAPITAL_LETTER_ETH, "ETH"},
        {LATIN_CAPITAL_LETTER_N_WITH_TILDE, "Ntilde"},
        {LATIN_CAPITAL_LETTER_O_WITH_GRAVE, "Ograve"},
        {LATIN_CAPITAL_LETTER_O_WITH_ACUTE, "Oacute"},
        {LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, "Ocirc"},
        {LATIN_CAPITAL_LETTER_O_WITH_TILDE, "Otilde"},
        {LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, "Ouml"},
        {MULTIPLICATION_SIGN, "times"},
        {LATIN_CAPITAL_LETTER_O_WITH_STROKE, "Oslash"},
        {LATIN_CAPITAL_LETTER_U_WITH_GRAVE, "Ugrave"},
        {LATIN_CAPITAL_LETTER_U_WITH_ACUTE, "Uacute"},
        {LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, "Ucirc"},
        {LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, "Uuml"},
        {LATIN_CAPITAL_LETTER_Y_WITH_ACUTE, "Yacute"},
        {LATIN_CAPITAL_LETTER_THORN, "THORN"},
        {LATIN_SMALL_LETTER_SHARP_S, "szlig"},
        {LATIN_SMALL_LETTER_A_WITH_GRAVE, "agrave"},
        {LATIN_SMALL_LETTER_A_WITH_ACUTE, "aacute"},
        {LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX, "acirc"},
        {LATIN_SMALL_LETTER_A_WITH_TILDE, "atilde"},
        {LATIN_SMALL_LETTER_A_WITH_DIAERESIS, "auml"},
        {LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, "aring"},
        {LATIN_SMALL_LETTER_AE, "aelig"},
        {LATIN_SMALL_LETTER_C_WITH_CEDILLA, "ccedil"},
        {LATIN_SMALL_LETTER_E_WITH_GRAVE, "egrave"},
        {LATIN_SMALL_LETTER_E_WITH_ACUTE, "eacute"},
        {LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX, "ecirc"},
        {LATIN_SMALL_LETTER_E_WITH_DIAERESIS, "euml"},
        {LATIN_SMALL_LETTER_I_WITH_GRAVE, "igrave"},
        {LATIN_SMALL_LETTER_I_WITH_ACUTE, "iacute"},
        {LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX, "icirc"},
        {LATIN_SMALL_LETTER_I_WITH_DIAERESIS, "iuml"},
        {LATIN_SMALL_LETTER_ETH, "eth"},
        {LATIN_SMALL_LETTER_N_WITH_TILDE, "ntilde"},
        {LATIN_SMALL_LETTER_O_WITH_GRAVE, "ograve"},
        {LATIN_SMALL_LETTER_O_WITH_ACUTE, "oacute"},
        {LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX, "ocirc"},
        {LATIN_SMALL_LETTER_O_WITH_TILDE, "otilde"},
        {LATIN_SMALL_LETTER_O_WITH_DIAERESIS, "ouml"},
        {DIVISION_SIGN, "divide"},
        {LATIN_SMALL_LETTER_O_WITH_STROKE, "oslash"},
        {LATIN_SMALL_LETTER_U_WITH_GRAVE, "ugrave"},
        {LATIN_SMALL_LETTER_U_WITH_ACUTE, "uacute"},
        {LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX, "ucirc"},
        {LATIN_SMALL_LETTER_U_WITH_DIAERESIS, "uuml"},
        {LATIN_SMALL_LETTER_Y_WITH_ACUTE, "yacute"},
        {LATIN_SMALL_LETTER_THORN, "thorn"},
        {LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, "yuml"},
        {LATIN_CAPITAL_LIGATURE_OE, "OElig"},
        {LATIN_SMALL_LIGATURE_OE, "oelig"},
        {LATIN_CAPITAL_LETTER_S_WITH_CARON, "Scaron"},
        {LATIN_SMALL_LETTER_S_WITH_CARON, "scaron"},
        {LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS, "Yuml"},
        {LATIN_SMALL_F_WITH_HOOK, "fnof"},
        {MODIFIER_LETTER_CIRCUMFLEX_ACCENT, "circ"},
        {SMALL_TILDE, "tilde"},
        {GREEK_CAPITAL_LETTER_ALPHA, "Alpha"},
        {GREEK_CAPITAL_LETTER_BETA, "Beta"},
        {GREEK_CAPITAL_LETTER_GAMMA, "Gamma"},
        {GREEK_CAPITAL_LETTER_DELTA, "Delta"},
        {GREEK_CAPITAL_LETTER_EPSILON, "Epsilon"},
        {GREEK_CAPITAL_LETTER_ZETA, "Zeta"},
        {GREEK_CAPITAL_LETTER_ETA, "Eta"},
        {GREEK_CAPITAL_LETTER_THETA, "Theta"},
        {GREEK_CAPITAL_LETTER_IOTA, "Iota"},
        {GREEK_CAPITAL_LETTER_KAPPA, "Kappa"},
        {GREEK_CAPITAL_LETTER_LAMDA, "Lambda"},
        {GREEK_CAPITAL_LETTER_MU, "Mu"},
        {GREEK_CAPITAL_LETTER_NU, "Nu"},
        {GREEK_CAPITAL_LETTER_XI, "Xi"},
        {GREEK_CAPITAL_LETTER_OMICRON, "Omicron"},
        {GREEK_CAPITAL_LETTER_PI, "Pi"},
        {GREEK_CAPITAL_LETTER_RHO, "Rho"},
        {GREEK_CAPITAL_LETTER_SIGMA, "Sigma"},
        {GREEK_CAPITAL_LETTER_TAU, "Tau"},
        {GREEK_CAPITAL_LETTER_UPSILON, "Upsilon"},
        {GREEK_CAPITAL_LETTER_PHI, "Phi"},
        {GREEK_CAPITAL_LETTER_CHI, "Chi"},
        {GREEK_CAPITAL_LETTER_PSI, "Psi"},
        {GREEK_CAPITAL_LETTER_OMEGA, "Omega"},
        {GREEK_SMALL_LETTER_ALPHA, "alpha"},
        {GREEK_SMALL_LETTER_BETA, "beta"},
        {GREEK_SMALL_LETTER_GAMMA, "gamma"},
        {GREEK_SMALL_LETTER_DELTA, "delta"},
        {GREEK_SMALL_LETTER_EPSILON, "epsilon"},
        {GREEK_SMALL_LETTER_ZETA, "zeta"},
        {GREEK_SMALL_LETTER_ETA, "eta"},
        {GREEK_SMALL_LETTER_THETA, "theta"},
        {GREEK_SMALL_LETTER_IOTA, "iota"},
        {GREEK_SMALL_LETTER_KAPPA, "kappa"},
        {GREEK_SMALL_LETTER_LAMDA, "lambda"},
        {GREEK_SMALL_LETTER_MU, "mu"},
        {GREEK_SMALL_LETTER_NU, "nu"},
        {GREEK_SMALL_LETTER_XI, "xi"},
        {GREEK_SMALL_LETTER_OMICRON, "omicron"},
        {GREEK_SMALL_LETTER_PI, "pi"},
        {GREEK_SMALL_LETTER_RHO, "rho"},
        {GREEK_SMALL_LETTER_FINAL_SIGMA, "sigmaf"},
        {GREEK_SMALL_LETTER_SIGMA, "sigma"},
        {GREEK_SMALL_LETTER_TAU, "tau"},
        {GREEK_SMALL_LETTER_UPSILON, "upsilon"},
        {GREEK_SMALL_LETTER_PHI, "phi"},
        {GREEK_SMALL_LETTER_CHI, "chi"},
        {GREEK_SMALL_LETTER_PSI, "psi"},
        {GREEK_SMALL_LETTER_OMEGA, "omega"},
        {GREEK_SMALL_LETTER_THETA_SYMBOL, "thetasym"},
        {GREEK_UPSILON_WITH_HOOK_SYMBOL, "upsih"},
        {GREEK_PI_SYMBOL, "piv"},
        {EN_SPACE, "ensp"},
        {EM_SPACE, "emsp"},
        {THIN_SPACE, "thinsp"},
        {ZERO_WIDTH_NON_JOINER, "zwnj"},
        {ZERO_WIDTH_JOINER, "zwj"},
        {LEFT_TO_RIGHT_MARK, "lrm"},
        {RIGHT_TO_LEFT_MARK, "rlm"},
        {EN_DASH, "ndash"},
        {EM_DASH, "mdash"},
        {LEFT_SINGLE_QUOTATION_MARK, "lsquo"},
        {RIGHT_SINGLE_QUOTATION_MARK, "rsquo"},
        {SINGLE_LOW_9_QUOTATION_MARK, "sbquo"},
        {LEFT_DOUBLE_QUOTATION_MARK, "ldquo"},
        {RIGHT_DOUBLE_QUOTATION_MARK, "rdquo"},
        {DOUBLE_LOW_9_QUOTATION_MARK, "bdquo"},
        {DAGGER, "dagger"},
        {DOUBLE_DAGGER, "Dagger"},
        {BULLET, "bull"},
        {HORIZONTAL_ELLIPSIS, "hellip"},
        {PER_MILLE_SIGN, "permil"},
        {PRIME, "prime"},
        {DOUBLE_PRIME, "Prime"},
        {SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK, "lsaquo"},
        {SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK, "rsaquo"},
        {OVERLINE, "oline"},
        {FRACTION_SLASH, "frasl"},
        {EURO_SIGN, "euro"},
        {BLACKLETTER_CAPITAL_I, "image"},
        {SCRIPT_CAPITAL_P, "weierp"},
        {BLACKLETTER_CAPITAL_R, "real"},
        {TRADE_MARK_SIGN, "trade"},
        {ALEF_SYMBOL, "alefsym"},
        {LEFTWARDS_ARROW, "larr"},
        {UPWARDS_ARROW, "uarr"},
        {RIGHTWARDS_ARROW, "rarr"},
        {DOWNWARDS_ARROW, "darr"},
        {LEFT_RIGHT_ARROW, "harr"},
        {DOWNWARDS_ARROW_WITH_CORNER_LEFTWARDS, "crarr"},
        {LEFTWARDS_DOUBLE_ARROW, "lArr"},
        {UPWARDS_DOUBLE_ARROW, "uArr"},
        {RIGHTWARDS_DOUBLE_ARROW, "rArr"},
        {DOWNWARDS_DOUBLE_ARROW, "dArr"},
        {LEFT_RIGHT_DOUBLE_ARROW, "hArr"},
        {FOR_ALL, "forall"},
        {PARTIAL_DIFFERENTIAL, "part"},
        {THERE_EXISTS, "exist"},
        {EMPTY_SET, "empty"},
        {NABLA, "nabla"},
        {ELEMENT_OF, "isin"},
        {NOT_AN_ELEMENT_OF, "notin"},
        {CONTAINS_AS_MEMBER, "ni"},
        {N_ARY_PRODUCT, "prod"},
        {N_ARY_SUMATION, "sum"},
        {MINUS_SIGN, "minus"},
        {ASTERISK_OPERATOR, "lowast"},
        {SQUARE_ROOT, "radic"},
        {PROPORTIONAL_TO, "prop"},
        {CHAR_INFINITY, "infin"},
        {ANGLE, "ang"},
        {LOGICAL_AND, "and"},
        {LOGICAL_OR, "or"},
        {INTERSECTION, "cap"},
        {UNION, "cup"},
        {INTEGRAL, "int"},
        {THEREFORE, "there4"},
        {TILDE_OPERATOR, "sim"},
        {APPROXIMATELY_EQUAL_TO, "cong"},
        {ALMOST_EQUAL_TO, "asymp"},
        {NOT_EQUAL_TO, "ne"},
        {IDENTICAL_TO, "equiv"},
        {LESS_THAN_OR_EQUAL_TO, "le"},
        {GREATER_THAN_OR_EQUAL_TO, "ge"},
        {SUBSET_OF, "sub"},
        {SUPERSET_OF, "sup"},
        {NOT_A_SUBSET_OF, "nsub"},
        {SUBSET_OF_OR_EQUAL_TO, "sube"},
        {SUPERSET_OF_OR_EQUAL_TO, "supe"},
        {CIRCLED_PLUS, "oplus"},
        {CIRCLED_TIMES, "otimes"},
        {UP_TACK, "perp"},
        {DOT_OPERATOR, "sdot"},
        {LEFT_CEILING, "lceil"},
        {RIGHT_CEILING, "rceil"},
        {LEFT_FLOOR, "lfloor"},
        {RIGHT_FLOOR, "rfloor"},
        {LEFT_POINTING_ANGLE_BRACKET, "lang"},
        {RIGHT_POINTING_ANGLE_BRACKET, "rang"},
        {LOZENGE, "loz"},
        {BLACK_SPADE_SUIT, "spades"},
        {BLACK_CLUB_SUIT, "clubs"},
        {BLACK_HEART_SUIT, "hearts"},
        {BLACK_DIAMOND_SUIT, "diams"},
    }) {}
}

//----------------------------------------------------------------------------
// Map html entity => character.
//----------------------------------------------------------------------------

namespace {
    MAP_SINGLETON(HTMLCharacters, std::string, ts::UChar);
    HTMLCharacters::HTMLCharacters() : SuperClass()
    {
        // Build inversed table from HTMLEntities.
        const HTMLEntities* he = HTMLEntities::Instance();
        for (auto it = he->begin(); it != he->end(); ++it) {
            insert(std::make_pair(it->second, it->first));
        }
    }
}


//----------------------------------------------------------------------------
// Map combined character => letter + diacritical mark pair.
// The value is a 32-bit combination of letter (16-bit MSB) and diacritical
// mark (16-bit LSB).
//----------------------------------------------------------------------------

#define DIAC(letter,mark) ((uint32_t(letter) << 16) | uint32_t(mark))
#define DIAC_LETTER(ui32) (UChar(((ui32) >> 16) & 0x0000FFFF))
#define DIAC_MARK(ui32)   (UChar((ui32) & 0x0000FFFF))

namespace {
    using namespace ts;
    MAP_SINGLETON(CombiningSequences, UChar, uint32_t);
    CombiningSequences::CombiningSequences() : SuperClass({
       {LATIN_CAPITAL_LETTER_A_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_A_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_A_WITH_TILDE,           DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_TILDE)},
       {LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,      DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_RING_ABOVE)},
       {LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_C, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_E_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_E_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_I_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_I_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_N_WITH_TILDE,           DIAC(LATIN_CAPITAL_LETTER_N, COMBINING_TILDE)},
       {LATIN_CAPITAL_LETTER_O_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_O_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_O_WITH_TILDE,           DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_TILDE)},
       {LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_O_WITH_STROKE,          DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_CAPITAL_LETTER_U_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_U_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_Y, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_A_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_A, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_A_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_A, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_A, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_A_WITH_TILDE,             DIAC(LATIN_SMALL_LETTER_A, COMBINING_TILDE)},
       {LATIN_SMALL_LETTER_A_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_A, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_A_WITH_RING_ABOVE,        DIAC(LATIN_SMALL_LETTER_A, COMBINING_RING_ABOVE)},
       {LATIN_SMALL_LETTER_C_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_C, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_E_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_E, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_E_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_E, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_E, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_E_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_E, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_I_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_I, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_I_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_I, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_I, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_I_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_I, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_N_WITH_TILDE,             DIAC(LATIN_SMALL_LETTER_N, COMBINING_TILDE)},
       {LATIN_SMALL_LETTER_O_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_O, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_O_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_O, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_O, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_O_WITH_TILDE,             DIAC(LATIN_SMALL_LETTER_O, COMBINING_TILDE)},
       {LATIN_SMALL_LETTER_O_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_O, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_O_WITH_STROKE,            DIAC(LATIN_SMALL_LETTER_O, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_SMALL_LETTER_U_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_U, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_U_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_U, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_U, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_U_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_U, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_Y_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_Y, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_Y_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_Y, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_A_WITH_MACRON,          DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_MACRON)},
       {LATIN_SMALL_LETTER_A_WITH_MACRON,            DIAC(LATIN_SMALL_LETTER_A, COMBINING_MACRON)},
       {LATIN_CAPITAL_LETTER_A_WITH_BREVE,           DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_BREVE)},
       {LATIN_SMALL_LETTER_A_WITH_BREVE,             DIAC(LATIN_SMALL_LETTER_A, COMBINING_BREVE)},
       {LATIN_CAPITAL_LETTER_A_WITH_OGONEK,          DIAC(LATIN_CAPITAL_LETTER_A, COMBINING_OGONEK)},
       {LATIN_SMALL_LETTER_A_WITH_OGONEK,            DIAC(LATIN_SMALL_LETTER_A, COMBINING_OGONEK)},
       {LATIN_CAPITAL_LETTER_C_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_C, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_C_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_C, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_C, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_C, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_C, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_C, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_C_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_C, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_C_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_C, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_D_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_D, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_D_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_D, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_D_WITH_STROKE,          DIAC(LATIN_CAPITAL_LETTER_D, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_SMALL_LETTER_D_WITH_STROKE,            DIAC(LATIN_SMALL_LETTER_D, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_CAPITAL_LETTER_E_WITH_MACRON,          DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_MACRON)},
       {LATIN_SMALL_LETTER_E_WITH_MACRON,            DIAC(LATIN_SMALL_LETTER_E, COMBINING_MACRON)},
       {LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_E, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_E_WITH_OGONEK,          DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_OGONEK)},
       {LATIN_SMALL_LETTER_E_WITH_OGONEK,            DIAC(LATIN_SMALL_LETTER_E, COMBINING_OGONEK)},
       {LATIN_CAPITAL_LETTER_E_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_E, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_E_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_E, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_G, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_G, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_G_WITH_BREVE,           DIAC(LATIN_CAPITAL_LETTER_G, COMBINING_BREVE)},
       {LATIN_SMALL_LETTER_G_WITH_BREVE,             DIAC(LATIN_SMALL_LETTER_G, COMBINING_BREVE)},
       {LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_G, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_G, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_G, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_G_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_G, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_H, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_H, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_H_WITH_STROKE,          DIAC(LATIN_CAPITAL_LETTER_H, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_SMALL_LETTER_H_WITH_STROKE,            DIAC(LATIN_SMALL_LETTER_H, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_CAPITAL_LETTER_I_WITH_TILDE,           DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_TILDE)},
       {LATIN_SMALL_LETTER_I_WITH_TILDE,             DIAC(LATIN_SMALL_LETTER_I, COMBINING_TILDE)},
       {LATIN_CAPITAL_LETTER_I_WITH_MACRON,          DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_MACRON)},
       {LATIN_SMALL_LETTER_I_WITH_MACRON,            DIAC(LATIN_SMALL_LETTER_I, COMBINING_MACRON)},
       {LATIN_CAPITAL_LETTER_I_WITH_OGONEK,          DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_OGONEK)},
       {LATIN_SMALL_LETTER_I_WITH_OGONEK,            DIAC(LATIN_SMALL_LETTER_I, COMBINING_OGONEK)},
       {LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_I, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_J, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_J, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_K, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_K_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_K, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_L_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_L, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_L_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_L, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_L, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_L_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_L, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_L_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_L, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_L_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_L, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_L_WITH_STROKE,          DIAC(LATIN_CAPITAL_LETTER_L, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_SMALL_LETTER_L_WITH_STROKE,            DIAC(LATIN_SMALL_LETTER_L, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_CAPITAL_LETTER_N_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_N, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_N_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_N, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_N, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_N_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_N, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_N_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_N, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_N_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_N, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_O_WITH_MACRON,          DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_MACRON)},
       {LATIN_SMALL_LETTER_O_WITH_MACRON,            DIAC(LATIN_SMALL_LETTER_O, COMBINING_MACRON)},
       {LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE,    DIAC(LATIN_CAPITAL_LETTER_O, COMBINING_DOUBLE_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE,      DIAC(LATIN_SMALL_LETTER_O, COMBINING_DOUBLE_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_R_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_R, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_R_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_R, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_R, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_R_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_R, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_R_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_R, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_R_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_R, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_S_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_S_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_S, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_S, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_S_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_S, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_S_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_S_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_S, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,         DIAC(LATIN_CAPITAL_LETTER_T, COMBINING_CEDILLA)},
       {LATIN_SMALL_LETTER_T_WITH_CEDILLA,           DIAC(LATIN_SMALL_LETTER_T, COMBINING_CEDILLA)},
       {LATIN_CAPITAL_LETTER_T_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_T, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_T_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_T, COMBINING_CARON)},
       {LATIN_CAPITAL_LETTER_T_WITH_STROKE,          DIAC(LATIN_CAPITAL_LETTER_T, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_SMALL_LETTER_T_WITH_STROKE,            DIAC(LATIN_SMALL_LETTER_T, COMBINING_LONG_STROKE_OVERLAY)},
       {LATIN_CAPITAL_LETTER_U_WITH_TILDE,           DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_TILDE)},
       {LATIN_SMALL_LETTER_U_WITH_TILDE,             DIAC(LATIN_SMALL_LETTER_U, COMBINING_TILDE)},
       {LATIN_CAPITAL_LETTER_U_WITH_MACRON,          DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_MACRON)},
       {LATIN_SMALL_LETTER_U_WITH_MACRON,            DIAC(LATIN_SMALL_LETTER_U, COMBINING_MACRON)},
       {LATIN_CAPITAL_LETTER_U_WITH_BREVE,           DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_BREVE)},
       {LATIN_SMALL_LETTER_U_WITH_BREVE,             DIAC(LATIN_SMALL_LETTER_U, COMBINING_BREVE)},
       {LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,      DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_RING_ABOVE)},
       {LATIN_SMALL_LETTER_U_WITH_RING_ABOVE,        DIAC(LATIN_SMALL_LETTER_U, COMBINING_RING_ABOVE)},
       {LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE,    DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_DOUBLE_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE,      DIAC(LATIN_SMALL_LETTER_U, COMBINING_DOUBLE_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_U_WITH_OGONEK,          DIAC(LATIN_CAPITAL_LETTER_U, COMBINING_OGONEK)},
       {LATIN_SMALL_LETTER_U_WITH_OGONEK,            DIAC(LATIN_SMALL_LETTER_U, COMBINING_OGONEK)},
       {LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_W, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_W, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,      DIAC(LATIN_CAPITAL_LETTER_Y, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX,        DIAC(LATIN_SMALL_LETTER_Y, COMBINING_CIRCUMFLEX_ACCENT)},
       {LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_Y, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_Z, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_Z_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_Z, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_Z, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_Z, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_Z_WITH_CARON,           DIAC(LATIN_CAPITAL_LETTER_Z, COMBINING_CARON)},
       {LATIN_SMALL_LETTER_Z_WITH_CARON,             DIAC(LATIN_SMALL_LETTER_Z, COMBINING_CARON)},
       {LATIN_SMALL_F_WITH_HOOK,                     DIAC(LATIN_SMALL_LETTER_F, COMBINING_HOOK_ABOVE)},
       {LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,     DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_COMMA_BELOW)},
       {LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW,       DIAC(LATIN_SMALL_LETTER_S, COMBINING_COMMA_BELOW)},
       {LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,     DIAC(LATIN_CAPITAL_LETTER_T, COMBINING_COMMA_BELOW)},
       {LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW,       DIAC(LATIN_SMALL_LETTER_T, COMBINING_COMMA_BELOW)},
       {GREEK_CAPITAL_LETTER_ALPHA_WITH_TONOS,       DIAC(GREEK_CAPITAL_LETTER_ALPHA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_EPSILON_WITH_TONOS,     DIAC(GREEK_CAPITAL_LETTER_EPSILON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_ETA_WITH_TONOS,         DIAC(GREEK_CAPITAL_LETTER_ETA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_IOTA_WITH_TONOS,        DIAC(GREEK_CAPITAL_LETTER_IOTA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_OMICRON_WITH_TONOS,     DIAC(GREEK_CAPITAL_LETTER_OMICRON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_UPSILON_WITH_TONOS,     DIAC(GREEK_CAPITAL_LETTER_UPSILON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_CAPITAL_LETTER_OMEGA_WITH_TONOS,       DIAC(GREEK_CAPITAL_LETTER_OMEGA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS, DIAC(GREEK_SMALL_LETTER_IOTA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_ALPHA_WITH_TONOS,         DIAC(GREEK_SMALL_LETTER_ALPHA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_EPSILON_WITH_TONOS,       DIAC(GREEK_SMALL_LETTER_EPSILON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_ETA_WITH_TONOS,           DIAC(GREEK_SMALL_LETTER_ETA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_IOTA_WITH_TONOS,          DIAC(GREEK_SMALL_LETTER_IOTA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS, DIAC(GREEK_SMALL_LETTER_UPSILON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_OMICRON_WITH_TONOS,       DIAC(GREEK_SMALL_LETTER_OMICRON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_UPSILON_WITH_TONOS,       DIAC(GREEK_SMALL_LETTER_UPSILON, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {GREEK_SMALL_LETTER_OMEGA_WITH_TONOS,         DIAC(GREEK_SMALL_LETTER_OMEGA, COMBINING_GREEK_DIALYTIKA_TONOS)},
       {LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_B, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_B, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_D, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_D, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_F, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_F, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_M, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_M, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_P, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_P, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_S, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_S, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,       DIAC(LATIN_CAPITAL_LETTER_T, COMBINING_DOT_ABOVE)},
       {LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE,         DIAC(LATIN_SMALL_LETTER_T, COMBINING_DOT_ABOVE)},
       {LATIN_CAPITAL_LETTER_W_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_W, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_W_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_W, COMBINING_GRAVE_ACCENT)},
       {LATIN_CAPITAL_LETTER_W_WITH_ACUTE,           DIAC(LATIN_CAPITAL_LETTER_W, COMBINING_ACUTE_ACCENT)},
       {LATIN_SMALL_LETTER_W_WITH_ACUTE,             DIAC(LATIN_SMALL_LETTER_W, COMBINING_ACUTE_ACCENT)},
       {LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,       DIAC(LATIN_CAPITAL_LETTER_W, COMBINING_DIAERESIS)},
       {LATIN_SMALL_LETTER_W_WITH_DIAERESIS,         DIAC(LATIN_SMALL_LETTER_W, COMBINING_DIAERESIS)},
       {LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,           DIAC(LATIN_CAPITAL_LETTER_Y, COMBINING_GRAVE_ACCENT)},
       {LATIN_SMALL_LETTER_Y_WITH_GRAVE,             DIAC(LATIN_SMALL_LETTER_Y, COMBINING_GRAVE_ACCENT)},
    }) {}
}


//----------------------------------------------------------------------------
// Map letter + diacritical mark pair => combined character.
//----------------------------------------------------------------------------

namespace {
    using namespace ts;
    MAP_SINGLETON(CombiningCharacters, uint32_t, UChar);
    CombiningCharacters::CombiningCharacters() : SuperClass()
    {
        // Build inversed table from HTMLEntities.
        const CombiningSequences* cs = CombiningSequences::Instance();
        for (auto it = cs->begin(); it != cs->end(); ++it) {
            insert(std::make_pair(it->second, it->first));
        }
    }
}


//----------------------------------------------------------------------------
// Map characters => characteristics.
//----------------------------------------------------------------------------

namespace {
    using namespace ts;
    MAP_SINGLETON(CharChar, UChar, uint32_t);
    CharChar::CharChar() : SuperClass({
        {CHAR_NULL,                                  0},
        {START_OF_HEADING,                           0},
        {START_OF_TEXT,                              0},
        {END_OF_TEXT,                                0},
        {END_OF_TRANSMISSION,                        0},
        {ENQUIRY,                                    0},
        {ACKNOWLEDGE,                                0},
        {BELL,                                       0},
        {BACKSPACE,                                  0},
        {HORIZONTAL_TABULATION,                      CCHAR_SPACE},
        {LINE_FEED,                                  CCHAR_SPACE},
        {VERTICAL_TABULATION,                        CCHAR_SPACE},
        {FORM_FEED,                                  CCHAR_SPACE},
        {CARRIAGE_RETURN,                            CCHAR_SPACE},
        {SHIFT_OUT,                                  0},
        {SHIFT_IN,                                   0},
        {DATA_LINK_ESCAPE,                           0},
        {DEVICE_CONTROL_ONE,                         0},
        {DEVICE_CONTROL_TWO,                         0},
        {DEVICE_CONTROL_THREE,                       0},
        {DEVICE_CONTROL_FOUR,                        0},
        {NEGATIVE_ACKNOWLEDGE,                       0},
        {SYNCHRONOUS_IDLE,                           0},
        {END_OF_TRANSMISSION_BLOCK,                  0},
        {CANCEL,                                     0},
        {END_OF_MEDIUM,                              0},
        {SUBSTITUTE,                                 0},
        {ESCAPE,                                     0},
        {FILE_SEPARATOR,                             0},
        {GROUP_SEPARATOR,                            0},
        {RECORD_SEPARATOR,                           0},
        {UNIT_SEPARATOR,                             0},
        {SPACE,                                      CCHAR_PRINT | CCHAR_SPACE},
        {EXCLAMATION_MARK,                           CCHAR_PRINT},
        {QUOTATION_MARK,                             CCHAR_PRINT},
        {NUMBER_SIGN,                                CCHAR_PRINT},
        {DOLLAR_SIGN,                                CCHAR_PRINT},
        {PERCENT_SIGN,                               CCHAR_PRINT},
        {AMPERSAND,                                  CCHAR_PRINT},
        {APOSTROPHE,                                 CCHAR_PRINT},
        {LEFT_PARENTHESIS,                           CCHAR_PRINT},
        {RIGHT_PARENTHESIS,                          CCHAR_PRINT},
        {ASTERISK,                                   CCHAR_PRINT},
        {PLUS_SIGN,                                  CCHAR_PRINT},
        {COMMA,                                      CCHAR_PRINT},
        {HYPHEN_MINUS,                               CCHAR_PRINT},
        {FULL_STOP,                                  CCHAR_PRINT},
        {SOLIDUS,                                    CCHAR_PRINT},
        {DIGIT_ZERO,                                 CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_ONE,                                  CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_TWO,                                  CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_THREE,                                CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_FOUR,                                 CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_FIVE,                                 CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_SIX,                                  CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_SEVEN,                                CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_EIGHT,                                CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {DIGIT_NINE,                                 CCHAR_PRINT | CCHAR_DIGIT | CCHAR_HEXA | CCHAR_LATIN},
        {COLON,                                      CCHAR_PRINT},
        {SEMICOLON,                                  CCHAR_PRINT},
        {LESS_THAN_SIGN,                             CCHAR_PRINT},
        {EQUALS_SIGN,                                CCHAR_PRINT},
        {GREATER_THAN_SIGN,                          CCHAR_PRINT},
        {QUESTION_MARK,                              CCHAR_PRINT},
        {COMMERCIAL_AT,                              CCHAR_PRINT},
        {LATIN_CAPITAL_LETTER_A,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_B,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_D,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_F,                     CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_G,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_H,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_J,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_K,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_L,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_M,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_N,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_P,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Q,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_R,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_V,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_W,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_X,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Y,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Z,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LEFT_SQUARE_BRACKET,                        CCHAR_PRINT},
        {REVERSE_SOLIDUS,                            CCHAR_PRINT},
        {RIGHT_SQUARE_BRACKET,                       CCHAR_PRINT},
        {CIRCUMFLEX_ACCENT,                          CCHAR_PRINT},
        {LOW_LINE,                                   CCHAR_PRINT},
        {GRAVE_ACCENT,                               CCHAR_PRINT | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_B,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_D,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_F,                       CCHAR_PRINT | CCHAR_HEXA | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_G,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_H,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_J,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_K,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_L,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_M,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_N,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_P,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Q,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_R,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_V,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_W,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_X,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Y,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Z,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LEFT_CURLY_BRACKET,                         CCHAR_PRINT},
        {VERTICAL_LINE,                              CCHAR_PRINT},
        {RIGHT_CURLY_BRACKET,                        CCHAR_PRINT},
        {TILDE,                                      CCHAR_PRINT},
        {CHAR_DELETE,                                CCHAR_PRINT},
        {NO_BREAK_SPACE,                             CCHAR_PRINT | CCHAR_SPACE},
        {INVERTED_EXCLAMATION_MARK,                  CCHAR_PRINT | CCHAR_LATIN},
        {CENT_SIGN,                                  CCHAR_PRINT | CCHAR_LATIN},
        {POUND_SIGN,                                 CCHAR_PRINT | CCHAR_LATIN},
        {CURRENCY_SIGN,                              CCHAR_PRINT},
        {YEN_SIGN,                                   CCHAR_PRINT},
        {BROKEN_BAR,                                 CCHAR_PRINT},
        {SECTION_SIGN,                               CCHAR_PRINT},
        {DIAERESIS,                                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COPYRIGHT_SIGN,                             CCHAR_PRINT},
        {FEMININE_ORDINAL_INDICATOR,                 CCHAR_PRINT},
        {LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK,  CCHAR_PRINT | CCHAR_LATIN},
        {NOT_SIGN,                                   CCHAR_PRINT},
        {SOFT_HYPHEN,                                CCHAR_PRINT},
        {REGISTERED_SIGN,                            CCHAR_PRINT},
        {MACRON,                                     CCHAR_PRINT},
        {DEGREE_SIGN,                                CCHAR_PRINT},
        {PLUS_MINUS_SIGN,                            CCHAR_PRINT},
        {SUPERSCRIPT_TWO,                            CCHAR_PRINT},
        {SUPERSCRIPT_THREE,                          CCHAR_PRINT},
        {ACUTE_ACCENT,                               CCHAR_PRINT | CCHAR_CDIACRIT},
        {MICRO_SIGN,                                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {PILCROW_SIGN,                               CCHAR_PRINT | CCHAR_CDIACRIT},
        {MIDDLE_DOT,                                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {CEDILLA,                                    CCHAR_PRINT | CCHAR_CDIACRIT},
        {SUPERSCRIPT_ONE,                            CCHAR_PRINT},
        {MASCULINE_ORDINAL_INDICATOR,                CCHAR_PRINT},
        {RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK, CCHAR_PRINT | CCHAR_LATIN},
        {VULGAR_FRACTION_ONE_QUARTER,                CCHAR_PRINT},
        {VULGAR_FRACTION_ONE_HALF,                   CCHAR_PRINT},
        {VULGAR_FRACTION_THREE_QUARTERS,             CCHAR_PRINT},
        {INVERTED_QUESTION_MARK,                     CCHAR_PRINT | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_TILDE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_AE,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_ETH,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_N_WITH_TILDE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_TILDE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {MULTIPLICATION_SIGN,                        CCHAR_PRINT},
        {LATIN_CAPITAL_LETTER_O_WITH_STROKE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_THORN,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_SHARP_S,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_TILDE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_RING_ABOVE,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_AE,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_ETH,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_N_WITH_TILDE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_TILDE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {DIVISION_SIGN,                              CCHAR_PRINT},
        {LATIN_SMALL_LETTER_O_WITH_STROKE,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Y_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_THORN,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Y_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_MACRON,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_MACRON,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_BREVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_BREVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_A_WITH_OGONEK,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_A_WITH_OGONEK,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_C_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_C_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_D_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_D_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_D_WITH_STROKE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_D_WITH_STROKE,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_MACRON,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_MACRON,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_OGONEK,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_OGONEK,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_E_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_E_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_G_WITH_BREVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_G_WITH_BREVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_G_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_H_WITH_STROKE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_H_WITH_STROKE,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_TILDE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_TILDE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_MACRON,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_MACRON,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_OGONEK,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_I_WITH_OGONEK,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_DOTLESS_I,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_K_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_KRA,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_L_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_L_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_L_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_L_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_L_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_L_WITH_STROKE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_L_WITH_STROKE,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_N_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_N_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_N_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_N_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_N_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_ENG,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_ENG,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_MACRON,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_MACRON,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE,   CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LIGATURE_OE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LIGATURE_OE,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_R_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_R_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_R_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_R_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_R_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T_WITH_CEDILLA,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T_WITH_STROKE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T_WITH_STROKE,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_TILDE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_TILDE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_MACRON,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_MACRON,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_BREVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_BREVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_RING_ABOVE,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE,   CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_U_WITH_OGONEK,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_U_WITH_OGONEK,           CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Z_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Z_WITH_CARON,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Z_WITH_CARON,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_F_WITH_HOOK,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,    CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,    CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {MODIFIER_LETTER_CIRCUMFLEX_ACCENT,          CCHAR_PRINT | CCHAR_LETTER},
        {CARON,                                      CCHAR_PRINT},
        {BREVE,                                      CCHAR_PRINT},
        {DOT_ABOVE,                                  CCHAR_PRINT},
        {OGONEK,                                     CCHAR_PRINT},
        {SMALL_TILDE,                                CCHAR_PRINT | CCHAR_LATIN},
        {DOUBLE_ACUTE_ACCENT,                        CCHAR_PRINT | CCHAR_LATIN},
        {COMBINING_GRAVE_ACCENT,                     CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LATIN},
        {COMBINING_ACUTE_ACCENT,                     CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LATIN},
        {COMBINING_CIRCUMFLEX_ACCENT,                CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LATIN},
        {COMBINING_TILDE,                            CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LATIN},
        {COMBINING_MACRON,                           CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_OVERLINE,                         CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_BREVE,                            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOT_ABOVE,                        CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DIAERESIS,                        CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_HOOK_ABOVE,                       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RING_ABOVE,                       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_ACUTE_ACCENT,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_CARON,                            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_VERTICAL_LINE_ABOVE,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_VERTICAL_LINE_ABOVE,       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_GRAVE_ACCENT,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_CANDRABINDU,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_INVERTED_BREVE,                   CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_TURNED_COMMA_ABOVE,               CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_COMMA_ABOVE,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_REVERSED_COMMA_ABOVE,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_COMMA_ABOVE_RIGHT,                CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_GRAVE_ACCENT_BELOW,               CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_ACUTE_ACCENT_BELOW,               CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_TACK_BELOW,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_TACK_BELOW,                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_ANGLE_ABOVE,                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_HORN,                             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_HALF_RING_BELOW,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_UP_TACK_BELOW,                    CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOWN_TACK_BELOW,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_PLUS_SIGN_BELOW,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_MINUS_SIGN_BELOW,                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_PALATALIZED_HOOK_BELOW,           CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RETROFLEX_HOOK_BELOW,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOT_BELOW,                        CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DIAERESIS_BELOW,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RING_BELOW,                       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_COMMA_BELOW,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_CEDILLA,                          CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LATIN},
        {COMBINING_OGONEK,                           CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_VERTICAL_LINE_BELOW,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_BRIDGE_BELOW,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_INVERTED_DOUBLE_ARCH_BELOW,       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_CARON_BELOW,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_CIRCUMFLEX_ACCENT_BELOW,          CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_BREVE_BELOW,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_INVERTED_BREVE_BELOW,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_TILDE_BELOW,                      CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_MACRON_BELOW,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LOW_LINE,                         CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_LOW_LINE,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_TILDE_OVERLAY,                    CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_SHORT_STROKE_OVERLAY,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LONG_STROKE_OVERLAY,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_SHORT_SOLIDUS_OVERLAY,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LONG_SOLIDUS_OVERLAY,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_HALF_RING_BELOW,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_INVERTED_BRIDGE_BELOW,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_SQUARE_BELOW,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_SEAGULL_BELOW,                    CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_X_ABOVE,                          CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_VERTICAL_TILDE,                   CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_OVERLINE,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_GRAVE_TONE_MARK,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_ACUTE_TONE_MARK,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_GREEK_PERISPOMENI,                CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {COMBINING_GREEK_KORONIS,                    CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {COMBINING_GREEK_DIALYTIKA_TONOS,            CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {COMBINING_GREEK_YPOGEGRAMMENI,              CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {COMBINING_BRIDGE_ABOVE,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_EQUALS_SIGN_BELOW,                CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_VERTICAL_LINE_BELOW,       CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_ANGLE_BELOW,                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_NOT_TILDE_ABOVE,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_HOMOTHETIC_ABOVE,                 CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_ALMOST_EQUAL_TO_ABOVE,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_RIGHT_ARROW_BELOW,           CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_UPWARDS_ARROW_BELOW,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_GRAPHEME_JOINER,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_ARROWHEAD_ABOVE,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_HALF_RING_ABOVE,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_FERMATA,                          CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_X_BELOW,                          CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LEFT_ARROWHEAD_BELOW,             CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_ARROWHEAD_BELOW,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_ARROWHEAD_AND_UP_ARROWHEAD_BELOW, CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_RIGHT_HALF_RING_ABOVE,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOT_ABOVE_RIGHT,                  CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_ASTERISK_BELOW,                   CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_RING_BELOW,                CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_ZIGZAG_ABOVE,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_BREVE_BELOW,               CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_BREVE,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_MACRON,                    CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_MACRON_BELOW,              CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_TILDE,                     CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_INVERTED_BREVE,            CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_DOUBLE_RIGHTWARDS_ARROW_BELOW,    CCHAR_PRINT | CCHAR_CDIACRIT},
        {COMBINING_LATIN_SMALL_LETTER_A,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_E,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_I,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_O,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_U,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_C,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_D,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_H,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_M,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_R,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_T,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_V,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {COMBINING_LATIN_SMALL_LETTER_X,             CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_LETTER | CCHAR_LATIN},
        {GREEK_YPOGEGRAMMENI,                        CCHAR_PRINT | CCHAR_GREEK},
        {GREEK_TONOS,                                CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {GREEK_DIALYTIKA_TONOS,                      CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_ALPHA_WITH_TONOS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_EPSILON_WITH_TONOS,    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_ETA_WITH_TONOS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_IOTA_WITH_TONOS,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_OMICRON_WITH_TONOS,    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_UPSILON_WITH_TONOS,    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_OMEGA_WITH_TONOS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS, CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_ALPHA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_BETA,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_GAMMA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_DELTA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_EPSILON,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_ZETA,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_ETA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_THETA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_IOTA,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_KAPPA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_LAMDA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_MU,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_NU,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_XI,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_OMICRON,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_PI,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_RHO,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_SIGMA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_TAU,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_UPSILON,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_PHI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_CHI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_PSI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_OMEGA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA,   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA, CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_ALPHA_WITH_TONOS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_EPSILON_WITH_TONOS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_ETA_WITH_TONOS,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_IOTA_WITH_TONOS,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS, CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_ALPHA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_BETA,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_GAMMA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_DELTA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_EPSILON,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_ZETA,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_ETA,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_THETA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_IOTA,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_KAPPA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_LAMDA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_MU,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_NU,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_XI,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_OMICRON,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_PI,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_RHO,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_FINAL_SIGMA,             CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_SIGMA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_TAU,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_UPSILON,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_PHI,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_CHI,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_PSI,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_OMEGA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA,     CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA,  CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_OMICRON_WITH_TONOS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_UPSILON_WITH_TONOS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_OMEGA_WITH_TONOS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_SMALL_LETTER_THETA_SYMBOL,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_UPSILON_WITH_HOOK_SYMBOL,             CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {GREEK_PI_SYMBOL,                            CCHAR_PRINT | CCHAR_LETTER | CCHAR_GREEK},
        {CYRILLIC_CAPITAL_LETTER_IO,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_DJE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_GJE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_UKRAINIAN_IE,       CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_DZE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I, CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_YI,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_JE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_LJE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_NJE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_TSHE,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_KJE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_SHORT_U,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_DZHE,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_A,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_BE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_VE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_GHE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_DE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_IE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_ZHE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_ZE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_I,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_SHORT_I,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_KA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_EL,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_EM,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_EN,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_O,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_PE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_ER,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_ES,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_TE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_U,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_EF,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_HA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_TSE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_CHE,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_SHA,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_SHCHA,              CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_HARD_SIGN,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_YERU,               CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_SOFT_SIGN,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_E,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_YU,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_CAPITAL_LETTER_YA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_A,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_BE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_VE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_GHE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_DE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_IE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_ZHE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_ZE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_I,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_SHORT_I,              CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_KA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_EL,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_EM,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_EN,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_O,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_PE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_ER,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_ES,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_TE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_U,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_EF,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_HA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_TSE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_CHE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_SHA,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_SHCHA,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_HARD_SIGN,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_YERU,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_SOFT_SIGN,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_E,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_YU,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_YA,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_IO,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_DJE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_GJE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_UKRAINIAN_IE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_DZE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I, CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_YI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_JE,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_LJE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_NJE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_TSHE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_KJE,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_SHORT_U,              CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {CYRILLIC_SMALL_LETTER_DZHE,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_CYRILLIC},
        {HEBREW_LETTER_ALEF,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_BET,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_GIMEL,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_DALET,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_HE,                           CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_VAV,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_ZAYIN,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_HET,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_TET,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_YOD,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_FINAL_KAF,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_KAF,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_LAMED,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_FINAL_MEM,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_MEM,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_FINAL_NUN,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_NUN,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_SAMEKH,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_AYIN,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_FINAL_PE,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_PE,                           CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_FINAL_TSADI,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_TSADI,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_QOF,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_RESH,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_SHIN,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {HEBREW_LETTER_TAV,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_HEBREW},
        {ARABIC_COMMA,                               CCHAR_PRINT | CCHAR_ARABIC},
        {ARABIC_SEMICOLON,                           CCHAR_PRINT | CCHAR_ARABIC},
        {ARABIC_QUESTION_MARK,                       CCHAR_PRINT | CCHAR_ARABIC},
        {ARABIC_LETTER_HAMZA,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ALEF_WITH_MADDA_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ALEF_WITH_HAMZA_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_WAW_WITH_HAMZA_ABOVE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ALEF_WITH_HAMZA_BELOW,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_YEH_WITH_HAMZA_ABOVE,         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ALEF,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_BEH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_TEH_MARBUTA,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_TEH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_THEH,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_JEEM,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_HAH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_KHAH,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_DAL,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_THAL,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_REH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ZAIN,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_SEEN,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_SHEEN,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_SAD,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_DAD,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_TAH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ZAH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_AIN,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_GHAIN,                        CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_TATWEEL,                             CCHAR_PRINT | CCHAR_ARABIC},
        {ARABIC_LETTER_FEH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_QAF,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_KAF,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_LAM,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_MEEM,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_NOON,                         CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_HEH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_WAW,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_ALEF_MAKSURA,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_LETTER_YEH,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_ARABIC},
        {ARABIC_FATHATAN,                            CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_DAMMATAN,                            CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_KASRATAN,                            CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_FATHA,                               CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_DAMMA,                               CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_KASRA,                               CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_SHADDA,                              CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {ARABIC_SUKUN,                               CCHAR_PRINT | CCHAR_CDIACRIT | CCHAR_ARABIC},
        {THAI_CHARACTER_KO_KAI,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHO_KHAI,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHO_KHUAT,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHO_KHWAI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHO_KHON,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHO_RAKHANG,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_NGO_NGU,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_CHO_CHAN,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_CHO_CHING,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_CHO_CHANG,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SO_SO,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_CHO_CHOE,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_YO_YING,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_DO_CHADA,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_TO_PATAK,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_THAN,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_NANGMONTHO,              CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_PHUTHAO,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_NO_NEN,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_DO_DEK,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_TO_TAO,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_THUNG,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_THAHAN,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THO_THONG,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_NO_NU,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_BO_BAIMAI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PO_PLA,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PHO_PHUNG,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_FO_FA,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PHO_PHAN,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_FO_FAN,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PHO_SAMPHAO,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MO_MA,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_YO_YAK,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_RO_RUA,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_RU,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_LO_LING,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_LU,                          CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_WO_WAEN,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SO_SALA,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SO_RUSI,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SO_SUA,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_HO_HIP,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_LO_CHULA,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_O_ANG,                       CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_HO_NOKHUK,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PAIYANNOI,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_A,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAI_HAN_AKAT,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_AA,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_AM,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_I,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_II,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_UE,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_UEE,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_U,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_UU,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_PHINTHU,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CURRENCY_SYMBOL_BAHT,                  CCHAR_PRINT | CCHAR_THAI},
        {THAI_CHARACTER_SARA_E,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_AE,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_O,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_AI_MAIMUAN,             CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_SARA_AI_MAIMALAI,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_LAKKHANGYAO,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAIYAMOK,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAITAIKHU,                   CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAI_EK,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAI_THO,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAI_TRI,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_MAI_CHATTAWA,                CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_THANTHAKHAT,                 CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_NIKHAHIT,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_YAMAKKAN,                    CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_FONGMAN,                     CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_DIGIT_ZERO,                            CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_ONE,                             CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_TWO,                             CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_THREE,                           CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_FOUR,                            CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_FIVE,                            CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_SIX,                             CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_SEVEN,                           CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_EIGHT,                           CCHAR_PRINT | CCHAR_THAI},
        {THAI_DIGIT_NINE,                            CCHAR_PRINT | CCHAR_THAI},
        {THAI_CHARACTER_ANGKHANKHU,                  CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {THAI_CHARACTER_KHOMUT,                      CCHAR_PRINT | CCHAR_LETTER | CCHAR_THAI},
        {LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_W_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_W_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_W_WITH_ACUTE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_W_WITH_ACUTE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,      CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_W_WITH_DIAERESIS,        CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,          CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {LATIN_SMALL_LETTER_Y_WITH_GRAVE,            CCHAR_PRINT | CCHAR_LETTER | CCHAR_LATIN},
        {EN_SPACE,                                   CCHAR_PRINT | CCHAR_SPACE},
        {EM_SPACE,                                   CCHAR_PRINT | CCHAR_SPACE},
        {THIN_SPACE,                                 CCHAR_PRINT | CCHAR_SPACE},
        {ZERO_WIDTH_NON_JOINER,                      0},
        {ZERO_WIDTH_JOINER,                          0},
        {LEFT_TO_RIGHT_MARK,                         CCHAR_CDIACRIT},
        {RIGHT_TO_LEFT_MARK,                         CCHAR_CDIACRIT},
        {EN_DASH,                                    CCHAR_PRINT},
        {EM_DASH,                                    CCHAR_PRINT},
        {HORIZONTAL_BAR,                             CCHAR_PRINT},
        {DOUBLE_LOW_LINE,                            CCHAR_PRINT},
        {LEFT_SINGLE_QUOTATION_MARK,                 CCHAR_PRINT},
        {RIGHT_SINGLE_QUOTATION_MARK,                CCHAR_PRINT},
        {SINGLE_LOW_9_QUOTATION_MARK,                CCHAR_PRINT},
        {LEFT_DOUBLE_QUOTATION_MARK,                 CCHAR_PRINT},
        {RIGHT_DOUBLE_QUOTATION_MARK,                CCHAR_PRINT},
        {DOUBLE_LOW_9_QUOTATION_MARK,                CCHAR_PRINT},
        {DAGGER,                                     CCHAR_PRINT},
        {DOUBLE_DAGGER,                              CCHAR_PRINT},
        {BULLET,                                     CCHAR_PRINT},
        {HORIZONTAL_ELLIPSIS,                        CCHAR_PRINT},
        {PER_MILLE_SIGN,                             CCHAR_PRINT},
        {PRIME,                                      CCHAR_PRINT},
        {DOUBLE_PRIME,                               CCHAR_PRINT},
        {SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK,  CCHAR_PRINT},
        {SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK, CCHAR_PRINT},
        {OVERLINE,                                   CCHAR_PRINT},
        {FRACTION_SLASH,                             CCHAR_PRINT},
        {EURO_SIGN,                                  CCHAR_PRINT},
        {DRACHMA_SIGN,                               CCHAR_PRINT},
        {BLACKLETTER_CAPITAL_I,                      CCHAR_PRINT | CCHAR_LETTER},
        {NUMERO_SIGN,                                CCHAR_PRINT},
        {SCRIPT_CAPITAL_P,                           CCHAR_PRINT | CCHAR_LETTER},
        {BLACKLETTER_CAPITAL_R,                      CCHAR_PRINT | CCHAR_LETTER},
        {TRADE_MARK_SIGN,                            CCHAR_PRINT},
        {ALEF_SYMBOL,                                CCHAR_PRINT},
        {LEFTWARDS_ARROW,                            CCHAR_PRINT},
        {UPWARDS_ARROW,                              CCHAR_PRINT},
        {RIGHTWARDS_ARROW,                           CCHAR_PRINT},
        {DOWNWARDS_ARROW,                            CCHAR_PRINT},
        {LEFT_RIGHT_ARROW,                           CCHAR_PRINT},
        {DOWNWARDS_ARROW_WITH_CORNER_LEFTWARDS,      CCHAR_PRINT},
        {LEFTWARDS_DOUBLE_ARROW,                     CCHAR_PRINT},
        {UPWARDS_DOUBLE_ARROW,                       CCHAR_PRINT},
        {RIGHTWARDS_DOUBLE_ARROW,                    CCHAR_PRINT},
        {DOWNWARDS_DOUBLE_ARROW,                     CCHAR_PRINT},
        {LEFT_RIGHT_DOUBLE_ARROW,                    CCHAR_PRINT},
        {FOR_ALL,                                    CCHAR_PRINT},
        {PARTIAL_DIFFERENTIAL,                       CCHAR_PRINT},
        {THERE_EXISTS,                               CCHAR_PRINT},
        {EMPTY_SET,                                  CCHAR_PRINT},
        {NABLA,                                      CCHAR_PRINT},
        {ELEMENT_OF,                                 CCHAR_PRINT},
        {NOT_AN_ELEMENT_OF,                          CCHAR_PRINT},
        {CONTAINS_AS_MEMBER,                         CCHAR_PRINT},
        {N_ARY_PRODUCT,                              CCHAR_PRINT},
        {N_ARY_SUMATION,                             CCHAR_PRINT},
        {MINUS_SIGN,                                 CCHAR_PRINT},
        {ASTERISK_OPERATOR,                          CCHAR_PRINT},
        {SQUARE_ROOT,                                CCHAR_PRINT},
        {PROPORTIONAL_TO,                            CCHAR_PRINT},
        {CHAR_INFINITY,                              CCHAR_PRINT},
        {ANGLE,                                      CCHAR_PRINT},
        {LOGICAL_AND,                                CCHAR_PRINT},
        {LOGICAL_OR,                                 CCHAR_PRINT},
        {INTERSECTION,                               CCHAR_PRINT},
        {UNION,                                      CCHAR_PRINT},
        {INTEGRAL,                                   CCHAR_PRINT},
        {THEREFORE,                                  CCHAR_PRINT},
        {TILDE_OPERATOR,                             CCHAR_PRINT},
        {APPROXIMATELY_EQUAL_TO,                     CCHAR_PRINT},
        {ALMOST_EQUAL_TO,                            CCHAR_PRINT},
        {NOT_EQUAL_TO,                               CCHAR_PRINT},
        {IDENTICAL_TO,                               CCHAR_PRINT},
        {LESS_THAN_OR_EQUAL_TO,                      CCHAR_PRINT},
        {GREATER_THAN_OR_EQUAL_TO,                   CCHAR_PRINT},
        {SUBSET_OF,                                  CCHAR_PRINT},
        {SUPERSET_OF,                                CCHAR_PRINT},
        {NOT_A_SUBSET_OF,                            CCHAR_PRINT},
        {SUBSET_OF_OR_EQUAL_TO,                      CCHAR_PRINT},
        {SUPERSET_OF_OR_EQUAL_TO,                    CCHAR_PRINT},
        {CIRCLED_PLUS,                               CCHAR_PRINT},
        {CIRCLED_TIMES,                              CCHAR_PRINT},
        {UP_TACK,                                    CCHAR_PRINT},
        {DOT_OPERATOR,                               CCHAR_PRINT},
        {LEFT_CEILING,                               CCHAR_PRINT},
        {RIGHT_CEILING,                              CCHAR_PRINT},
        {LEFT_FLOOR,                                 CCHAR_PRINT},
        {RIGHT_FLOOR,                                CCHAR_PRINT},
        {LEFT_POINTING_ANGLE_BRACKET,                CCHAR_PRINT},
        {RIGHT_POINTING_ANGLE_BRACKET,               CCHAR_PRINT},
        {LOZENGE,                                    CCHAR_PRINT},
        {BLACK_SPADE_SUIT,                           CCHAR_PRINT},
        {BLACK_CLUB_SUIT,                            CCHAR_PRINT},
        {BLACK_HEART_SUIT,                           CCHAR_PRINT},
        {BLACK_DIAMOND_SUIT,                         CCHAR_PRINT},
    }) {}
}

uint32_t ts::UCharacteristics(UChar c)
{
    const CharChar* ll = CharChar::Instance();
    const CharChar::const_iterator it(ll->find(c));
    return it == ll->end() ? 0 : it->second;
}


//----------------------------------------------------------------------------
// Character conversions.
//----------------------------------------------------------------------------

int ts::ToDigit(UChar c, int base, int defaultValue)
{
    int digit = -1;
    if (c >= UChar('0') && c <= UChar('9')) {
        digit = c - UChar('0');
    }
    else if (c >= UChar('a') && c <= UChar('z')) {
        digit = 10 + c - UChar('a');
    }
    else if (c >= UChar('A') && c <= UChar('Z')) {
        digit = 10 + c - UChar('A');
    }
    return digit >= 0 && digit < base ? digit : defaultValue;
}

bool ts::IsLower(UChar c)
{
    if (std::iswlower(wint_t(c)) != 0) {
        // The standard function says it is lower.
        return true;
    }
    else {
        // Check if it is a known lowercase for us.
        return Contains(*LowerUpper::Instance(), c);
    }
}

bool ts::IsUpper(UChar c)
{
    if (std::iswupper(wint_t(c)) != 0) {
        // The standard function says it is upper.
        return true;
    }
    else {
        // Check if it is a known uppercase for us.
        return Contains(*UpperLower::Instance(), c);
    }
}

ts::UChar ts::ToLower(UChar c)
{
    const UChar result = UChar(std::towlower(wint_t(c)));
    if (result != c) {
        // The standard function has found a translation.
        return result;
    }
    else {
        // Search for an additional translation, if any.
        const UpperLower* ul = UpperLower::Instance();
        const UpperLower::const_iterator it(ul->find(c));
        return it == ul->end() ? c : it->second;
    }
}

ts::UChar ts::ToUpper(UChar c)
{
    const UChar result = UChar(std::towupper(wint_t(c)));
    if (result != c) {
        // The standard function has found a translation.
        return result;
    }
    else {
        // Search for an additional translation, if any.
        const LowerUpper* lu = LowerUpper::Instance();
        const LowerUpper::const_iterator it(lu->find(c));
        return it == lu->end() ? c : it->second;
    }
}

bool ts::IsAccented(UChar c)
{
    const WithoutAccent* wa = WithoutAccent::Instance();
    const WithoutAccent::const_iterator it(wa->find(c));
    return it != wa->end();
}

ts::UString ts::RemoveAccent(UChar c)
{
    const WithoutAccent* wa = WithoutAccent::Instance();
    const WithoutAccent::const_iterator it(wa->find(c));
    return it == wa->end() ? ts::UString(1, c) : ts::UString::FromUTF8(it->second);
}

ts::UString ts::ToHTML(UChar c)
{
    const HTMLEntities* he = HTMLEntities::Instance();
    const HTMLEntities::const_iterator it(he->find(c));
    return it == he->end() ? ts::UString(1, c) : (ts::UChar('&') + ts::UString::FromUTF8(it->second) + ts::UChar(';'));
}

ts::UChar ts::FromHTML(const UString& entity)
{
    const HTMLCharacters* hc = HTMLCharacters::Instance();
    const HTMLCharacters::const_iterator it(hc->find(entity.toUTF8()));
    return it == hc->end() ? CHAR_NULL : it->second;
}

void ts::UString::convertToHTML(const UString& convert)
{
    // Should not be there, but this is much faster to do it that way.
    const bool convertAll = convert.empty();
    const HTMLEntities* he = HTMLEntities::Instance();
    for (size_type i = 0; i < length(); ) {
        if (!convertAll && convert.find(at(i)) == NPOS) {
            // Do not convert this one.
            ++i;
        }
        else {
            // Look for an HTML entity.
            const HTMLEntities::const_iterator it(he->find(at(i)));
            if (it == he->end()) {
                // No HTML entity for this character, don't convert.
                ++i;
            }
            else {
                // Replace the character with the HTML entity.
                const UString rep(UString::FromUTF8(it->second));
                at(i) = ts::AMPERSAND;
                insert(i + 1, rep);
                insert(i + 1 + rep.length(), 1, ts::SEMICOLON);
                i += rep.length() + 2;
            }
        }
    }
}

void ts::UString::convertFromHTML()
{
    // Should not be there, but this is much faster to do it that way.
    const HTMLCharacters* hc = HTMLCharacters::Instance();
    for (size_type i = 0; i < length(); ) {

        // Find next "&...;" sequence.
        const size_type amp = find(u'&', i);
        if (amp == NPOS) {
            // No more sequence, conversion is over.
            return;
        }
        const size_type semi = find(u';', amp + 1);
        if (semi == NPOS) {
            // Sequence not terminated, invalid, do not modify.
            return;
        }

        // Sequence found, locate character translation.
        assert(semi > amp);
        const HTMLCharacters::const_iterator it(hc->find(substr(amp + 1, semi - amp - 1).toUTF8()));
        if (it == hc->end()) {
            // Unknown sequence, leave it as is.
            i = semi + 1;
        }
        else {
            // Replace the sequence by the character.
            at(amp) = it->second;
            erase(amp + 1, semi - amp);
            i = amp + 1;
        }
    }
}


//----------------------------------------------------------------------------
// Check two characters match, case sensitive or insensitive.
//----------------------------------------------------------------------------

bool ts::Match(UChar c1, UChar c2, CaseSensitivity cs)
{
    switch (cs) {
        case CASE_INSENSITIVE: return ToUpper(c1) == ToUpper(c2);
        case CASE_SENSITIVE: return c1 == c2;
        default: return false; // invalid cs
    }
}


//----------------------------------------------------------------------------
// Decompose a precombined character into its base letter and non-spacing
// diacritical mark.
//----------------------------------------------------------------------------

bool ts::DecomposePrecombined(UChar c, UChar& letter, UChar& mark)
{
    const CombiningSequences* cs = CombiningSequences::Instance();
    const auto it = cs->find(c);
    const bool found = it != cs->end();
    if (found) {
        letter = DIAC_LETTER(it->second);
        mark = DIAC_MARK(it->second);
    }
    return found;
}


//----------------------------------------------------------------------------
// Build a precombined character from its base letter and non-spacing
// diacritical mark.
//----------------------------------------------------------------------------

ts::UChar ts::Precombined(UChar letter, UChar mark)
{
    const CombiningCharacters* cc = CombiningCharacters::Instance();
    const auto it = cc->find(DIAC(letter, mark));
    return it == cc->end() ? CHAR_NULL : it->second;
}
